
cd D:\Dropbox\book_welfare\replication\



**************************
* create NAT BOOK AWARD data  
**************************
import excel "data\nationalbook_annual.xlsx", sheet("Sheet1") firstrow clear


	split info, parse("WINNER" "More about this book")
	 
	rename info2 winner 
	drop info? 
	drop info??

	split info, parse("FINALISTS" "More about this book >")
	 
	drop info1 info2 

	rename winner info0 
	drop info 

	split StartingPageURL, parse("awards-" "/?cat=")
	gen year=real(StartingPageURL3 )
	rename StartingPageURL4 category 
	keep info* year category ID
	drop if year==.
	reshape long info, i(ID category year) j(num)
	gen winner = num==0 
	drop if index(info,"prev")*index(info,"next")>0 
	drop if info==""
	split info, parse(char(10))
	 
	gen x = subinstr(info, char(10), "X", .)

	split x, parse("X")
	replace x7=x9 if length(x7)==1 & length(x9)>1
	replace x10=x11 if length(x10)==1 & length(x11)>1
	replace x10=x12 if length(x10)==1 & length(x11)==1& length(x12)>1

	gen title=x7 
	gen author=x10
	replace author=subinstr(author,char(13),"",.)

	keep title author winner category year 
	 
	 
	split author, parse("")
	rename author1 name 
 
	drop if name==""
 
 
	gen NAME=upper(name)
	drop name 
	rename NAME name 
	merge m:1 name using data\name_gender_wipo.dta

	drop if _merge==2 
	drop _merge 

	gen x=1
	gen dmatch=mshare~=.
	gen femshare=1-mshare 
	replace name=subinstr(name,char(13),"",.)
	drop if name==""

	preserve 
		import excel data\hand_match_name_gender.xlsx, sheet("nba") firstrow clear
		keep author male 
		gen xfemshare=1 if male==0 
		replace xfemshare=0 if male==1 
		keep author xfemshare
		tempfile fem 
		save `fem'
	restore 

	merge m:1 author using `fem'
 
	replace femshare = xfemshare if xfemshare~=. 
	drop xfemshare 
	drop if _merge ==2 
	drop _merge 
 
	collapse (sum) N=x Nname=dmatch Nf = femshare, by(year category)
 save data\nba.dta, replace 